{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "第四节：数据存储技术 文件与数据库\n",
    "==\n",
    "\n",
    "# 4.1 文件存储技术\n",
    "\n",
    "注意：本章节可以参考python数据获取.ipynb介绍，仅仅是对这一块内容做补充！\n",
    "\n",
    "4.1.1 文件、二进制文件（略）\n",
    "4.1.2 csv json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wes\n",
      "[{'name': 'Scott', 'age': 25, 'pet': 'Zuko'}, {'name': 'Katie', 'age': 33, 'pet': 'Cisco'}]\n",
      "{'name': 'Katie', 'age': 33, 'pet': 'Cisco'}\n",
      "25\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "obj = \"\"\"\n",
    "{\"name\": \"Wes\",\n",
    " \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n",
    " \"pet\": null,\n",
    " \"siblings\": [{\"name\": \"Scott\", \"age\": 25, \"pet\": \"Zuko\"},\n",
    "              {\"name\": \"Katie\", \"age\": 33, \"pet\": \"Cisco\"}]\n",
    "}\n",
    "\"\"\"\n",
    "a = json.loads(obj)\n",
    "print(a['name'])\n",
    "print(a.get('siblings'))\n",
    "print(a.get('siblings')[1])\n",
    "print(a.get('age',25))\n",
    "#25表示默认值,如果25不写，则默认None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "注意：JSON数据需要双引号包围，不能用单引号\n",
    "\n",
    "csv json其他参看**python数据获取.ipynb**文件"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.2 CSV格式文件 补充\n",
    "\n",
    "A.读写 csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['id', 'name', 'age'], [], ['10001', 'Mike', '20'], [], ['10002', 'Json', '22'], [], ['10004', 'Pi', '31'], [], ['20320', 'Siren', '20'], []]\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "with open(\"4.test.csv\",\"w\") as f:\n",
    "    writer = csv.writer(f,delimiter=',')\n",
    "    writer.writerow(['id','name','age'])\n",
    "    writer.writerow(['10001','Mike',20])\n",
    "    writer.writerows([['10002','Json',22],['10004','Pi',31],['20320','Siren',20]])\n",
    "f.close()\n",
    "with open('4.test.csv','r',encoding='utf-8') as csvfile:\n",
    "    reader = csv.reader(csvfile)\n",
    "    print([i for i in reader])\n",
    "csvfile.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "一般来说爬虫使用了结构化的数据，这里我们最好使用字典实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['id', 'name', 'age'], [], ['100001', 'Muke', '21'], [], ['100002', 'Syn Cole', '30'], [], ['100003', 'Kygo', '30'], [], ['100007', 'Martin', '24'], [], ['123455', 'syc', '19'], []]\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "with open('4.test.csv','w') as f:\n",
    "    fin = ['id','name','age']\n",
    "    writer = csv.DictWriter(f,fieldnames=fin)\n",
    "    writer.writeheader()\n",
    "    writer.writerow({'id':'100001','name':'Muke','age':21})\n",
    "    writer.writerows([{'id':'100002','name':'Syn Cole','age':30},\n",
    "                      {'id':'100003','name':'Kygo','age':30},\n",
    "                      {'id':'100007','name':'Martin','age':24},\n",
    "                      {'id':'123455','name':'syc','age':19}])\n",
    "f.close()\n",
    "with open('4.test.csv','r',encoding='utf-8') as csvfile:\n",
    "    reader = csv.reader(csvfile)\n",
    "    print([i for i in reader])\n",
    "csvfile.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.3 关系型数据库 以MySql为例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('8.0.20',)\n"
     ]
    }
   ],
   "source": [
    "import pymysql\n",
    "db = pymysql.connect(host='localhost',user='root',password='ATOMBOMB123abc',port=3306)\n",
    "cursor = db.cursor()\n",
    "cursor.execute('SELECT VERSION()')#查询版本号\n",
    "data = cursor.fetchone()\n",
    "cursor.execute('CREATE DATABASE SPY DEFAULT CHARACTER SET utf8')\n",
    "print(data)\n",
    "db.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "PyCharm (PycharmWorkspace)",
   "language": "python",
   "name": "pycharm-7b63a738"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}